
################################################################################
##                   Barberia et al. (2022)                                   
##                   APSR Replication Code For:                               
##  "The Relationship between Ideology and COVID-19 Deaths: What We Know and  
##                   What We Still Need to Know"                              
##                                                                            
##                    Figures 1 thru 8                                        
## Instructions:
## In sections 1. and 2. are the details for generating the data used for the
## plots: "votes_for_bolsonaro_2018.rds" and "area_pop_covid_mun_20220327.rds"
##
## To generate Figures 1 to 6, use the "votes_for_bolsonaro_2018.rds" dataset
## Figures 7 and 8 use "area_pop_covid_mun_20220327.rds"
################################################################################

## Packages

library(tidyverse)
library(sf)
library(RColorBrewer)
library(scales)
library(httr)


## Set the directory

setwd("Vacinação e discursos")

## Set the Font

windowsFonts("Cambria" = windowsFont("Cambria"))

# 1. Generating Data used in the figures (votes_for_bolsonaro_2018.rds AND
# area_pop_covid_mun_20220327.rds)----------------------------------------

# 
## 1.1. Candidates ---------------------------------------------------------

## Data URL 

url <- "https://cdn.tse.jus.br/estatistica/sead/odsele/consulta_cand/consulta_cand_2018.zip"

## Download the data

download.file(url = url,
              destfile = "candidates_pres_2018.zip")

## Unzip the data

unzip(zipfile = "candidates_pres_2018.zip",
      exdir = "data/input/candidates")

## Exclude the zip file

file.remove("candidates_pres_2018.zip")

## List of the unziped csv's

list_data <- list.files(path = "data/input/candidates",
                        pattern = "consulta_cand_2018", 
                        full.names = TRUE)

## Remove BR file of the list

list_data <- list_data[c(-6)]

## Exclude unnecessary files

for(i in list_data){

  cat("Deleting", i, "\n")

  file.remove(i)

}

## Load the BR file

candidates <- read.table(file = "data/input/candidates/consulta_cand_2018_BR.csv",
                         header = TRUE,
                         sep = ";",
                         stringsAsFactors = FALSE)

## Selecting only the necessary variables

 candidates <- candidates %>%
  select(ANO_ELEICAO,
         NM_TIPO_ELEICAO,
         NR_TURNO,
         TP_ABRANGENCIA,
         CD_CARGO,
         DS_CARGO,
         NR_CANDIDATO,
         NM_CANDIDATO,
         NM_URNA_CANDIDATO,
         NM_SOCIAL_CANDIDATO,
         NR_CPF_CANDIDATO,
         NR_TITULO_ELEITORAL_CANDIDATO,
         DS_DETALHE_SITUACAO_CAND,
         TP_AGREMIACAO,
         NR_PARTIDO,
         SG_PARTIDO,
         SQ_COLIGACAO,
         NM_COLIGACAO,
         DS_COMPOSICAO_COLIGACAO,
         SG_UF_NASCIMENTO,
         NM_MUNICIPIO_NASCIMENTO,
         DT_NASCIMENTO,
         DS_GENERO,
         DS_COR_RACA,
         DS_GRAU_INSTRUCAO,
         DS_OCUPACAO,
         DS_SIT_TOT_TURNO)

## Save the data

saveRDS(candidates,
        "data/input/candidates/candidates_pres_2018.rds")

## Read the data

#candidates <- readRDS("data/input/candidates/candidates_pres_2018.rds")

## 1.2. Votes --------------------------------------------------------------

url <- "https://cdn.tse.jus.br/estatistica/sead/odsele/votacao_candidato_munzona/votacao_candidato_munzona_2018.zip"

## Download the data

download.file(url = url,
              destfile = "votes_pres_2018.zip")

## Unzip the data

unzip(zipfile = "votes_pres_2018.zip",
      exdir = "data/input/votes")

## Exclude the zip file

file.remove("votes_pres_2018.zip")

## List of the unziped csv's

list_data <- list.files(path = "data/input/votes",
                        pattern = "votacao_candidato_munzona_2018", 
                        full.names = TRUE)

## Remove BR file of the list

list_data <- list_data[c(-6)]

## Exclude unnecessary files

for(i in list_data){
  
  cat("Deleting", i, "\n")
  
  file.remove(i)
  
}

## Load the BR file

votes <- read.table(file = "data/input/votes/votacao_candidato_munzona_2018_BR.csv",
                    header = TRUE,
                    sep = ";",
                    stringsAsFactors = FALSE)

## Selecting only the necessary variables
## and grouping the results by municipality

votes <- votes %>%
  select(ANO_ELEICAO,
         NR_TURNO,
         SG_UF,
         CD_MUNICIPIO,
         NM_MUNICIPIO,
         CD_CARGO,
         NR_CANDIDATO,
         NM_CANDIDATO,
         NR_PARTIDO,
         QT_VOTOS_NOMINAIS) %>%
  group_by(ANO_ELEICAO,
           NR_TURNO,
           SG_UF,
           CD_MUNICIPIO,
           NM_MUNICIPIO,
           CD_CARGO,
           NR_CANDIDATO,
           NM_CANDIDATO,
           NR_PARTIDO) %>%
  summarise(QT_VOTOS = sum(QT_VOTOS_NOMINAIS))

## Save the data

saveRDS(votes,
        "data/input/votes/votes_pres_2018.rds")

## Read the data

#votes <- readRDS("data/input/votes/votes_pres_2018.rds")

## 1.3. Election Summary ---------------------------------------------------

url <- "https://cdn.tse.jus.br/estatistica/sead/odsele/detalhe_votacao_munzona/detalhe_votacao_munzona_2018.zip"

## Download the data

download.file(url = url,
              destfile = "election_summary_pres_2018.zip")

## Unzip the data

unzip(zipfile = "election_summary_pres_2018.zip",
      exdir = "data/input/election summary")

## Exclude the zip file

file.remove("election_summary_pres_2018.zip")

## List of the unziped csv's

list_data <- list.files(path = "data/input/election summary",
                        pattern = "detalhe_votacao_munzona_2018", 
                        full.names = TRUE)

## Remove BR file of the list

list_data <- list_data[c(-6)]

## Exclude unnecessary files

for(i in list_data){
  
  cat("Deleting", i, "\n")
  
  file.remove(i)
  
}

## Load the BR file

elsummary <- read.table(file = "data/input/election summary/detalhe_votacao_munzona_2018_BR.csv",
                        header = TRUE,
                        sep = ";",
                        stringsAsFactors = FALSE)

## Selecting only the necessary variables
## and grouping the results by municipality

elsummary <- elsummary %>%
  select(ANO_ELEICAO,
         NR_TURNO,
         SG_UF,
         CD_MUNICIPIO,
         NM_MUNICIPIO,
         CD_CARGO,
         QT_APTOS,
         QT_COMPARECIMENTO,
         QT_ABSTENCOES,
         QT_VOTOS_NOMINAIS,
         QT_VOTOS_BRANCOS,
         QT_VOTOS_NULOS,
         QT_VOTOS_LEGENDA,
         QT_VOTOS_ANULADOS) %>%
  group_by(ANO_ELEICAO,
           NR_TURNO,
           SG_UF,
           CD_MUNICIPIO,
           NM_MUNICIPIO,
           CD_CARGO) %>%
  summarise(QT_APTOS = sum(QT_APTOS),
            QT_COMPARECIMENTO = sum(QT_COMPARECIMENTO),
            QT_ABSTENCOES = sum(QT_ABSTENCOES),
            QT_VOTOS_NOMINAIS = sum(QT_VOTOS_NOMINAIS),
            QT_VOTOS_BRANCOS = sum(QT_VOTOS_BRANCOS),
            QT_VOTOS_NULOS = sum(QT_VOTOS_NULOS),
            QT_VOTOS_LEGENDA = sum(QT_VOTOS_LEGENDA),
            QT_VOTOS_ANULADOS = sum(QT_VOTOS_ANULADOS))

## Save the data

saveRDS(elsummary,
        "data/input/election summary/election summary_pres_2018.rds")

## Read the data

#elsummary <- readRDS("data/input/election summary/election summary_pres_2018.rds")

## Calculate the total of valid votes 
## for each city and round

elsummary$`VOTOS VÁLIDOS` <- (elsummary$QT_VOTOS_NOMINAIS + elsummary$QT_VOTOS_LEGENDA) - elsummary$QT_VOTOS_ANULADOS

## 1.4. Cases and Deaths by Covid-19 ---------------------------------------

## Data URL

url <- "https://data.brasil.io/dataset/covid19/caso_full.csv.gz" 

## Temp file where data will be stored

temp <- tempfile()

## Download the data

download.file(url = url, 
              destfile = temp) 

## Create a connection

gzfile(temp, 
       'rt')

## Load the data

covid <- read.csv(temp,
                  stringsAsFactors = FALSE,
                  encoding = "UTF-8")

## Aggregate by city and filter
## the data until 2022-03-27

covid <- covid %>% 
  filter(place_type == "city" &
         date == "2022-03-27") %>%
  drop_na(city_ibge_code)

## Create new variables

covid <- covid %>%
  mutate(last_available_deaths_per_100k_inhabitans = (last_available_deaths/estimated_population) * 100000,
         new_confirmed_per_100k = (new_confirmed / estimated_population) * 100000,
         new_deaths_per_100k = (new_deaths / estimated_population) * 100000,
         city_ibge_code = as.character(city_ibge_code),
         city_ibge_code6 = substr(city_ibge_code,
                                  1, 
                                  nchar(city_ibge_code)-1)) %>% 
  select(-c(place_type,
            order_for_place,
            last_available_confirmed,
            last_available_deaths,
            last_available_death_rate,
            is_last,
            is_repeated,
            new_confirmed,
            new_deaths,
            estimated_population_2019,
            last_available_date))
##Creating variable `category_pop` based on population size
covid <- covid %>% ungroup() %>%
  mutate(
    category_pop = case_when(
      estimated_population > 500000 ~ "Above 500,000",
      estimated_population >= 100001 &
        estimated_population <= 500000 ~ "100,001 to 500,000",
      estimated_population >= 50001 &
        estimated_population <= 100000 ~ "50,001 to 100,000",
      estimated_population >= 20001 &
        estimated_population <= 50000 ~ "20,001 to 50,000",
      estimated_population >= 10001 &
        estimated_population <= 20000 ~ "10,001 to 20,000",
      estimated_population >= 5001 &
        estimated_population <= 10000 ~ "5,001 to 10,000",
      T ~ "Up to 5,000"
    )
  )
covid$category_pop <-
  factor(
    covid$category_pop,
    levels = c(
      "Up to 5,000",
      "5,001 to 10,000",
      "10,001 to 20,000",
      "20,001 to 50,000",
      "50,001 to 100,000",
      "100,001 to 500,000",
      "Above 500,000"
    )
  )

## Getting number of municipality of each group
covid <- covid %>% group_by(category_pop)%>% mutate(n_category_pop = n())



## Save the data

saveRDS(covid,
        "data/input/cases_and_deaths_by_covid_20220327.rds")

## Load the file

#covid <- readRDS("data/input/cases_and_deaths_by_covid_20220327.rds")




## 1.5. Estimated Population by Municipality  ------------------------------

## Reference: https://www.ibge.gov.br/estatisticas/sociais/populacao/9103-estimativas-de-populacao.html?=&t=resultados

## Download Brazil estimated population 
## by municipality in 2021 (IBGE/2021-07-01)

download.file(url = "https://ftp.ibge.gov.br/Estimativas_de_Populacao/Estimativas_2021/POP2021_20220419.xls",
              destfile = "data/input/estimated_population_2021_ibge.xls",
              mode = "wb")

## Load the file

estimatedpop <- readxl::read_xls("data/input/estimated_population_2021_ibge.xls",
                                 sheet = 2,
                                 skip =  1,
                                 n_max = 5570)

## Create a column for IBGE Municipality Code
## and rename the columns

estimatedpop <- estimatedpop %>% 
  mutate(city_ibge_code = paste0(`COD. UF`,
                           `COD. MUNIC`),
         city_ibge_code6 = substr(city_ibge_code,
                                  1, 
                                  nchar(city_ibge_code)-1),
         `POPULAÇÃO ESTIMADA` = gsub(r"{\s*\([^\)]+\)}",
                                     "",
                                     `POPULAÇÃO ESTIMADA`),
         `POPULAÇÃO ESTIMADA` = gsub("\\.",
                                     "",
                                     `POPULAÇÃO ESTIMADA`)) %>% 
  rename("state" = "UF",
         "name_munic" = "NOME DO MUNICÍPIO",
         "estimated_population" = "POPULAÇÃO ESTIMADA") %>% 
  select(state,
         city_ibge_code,
         city_ibge_code6,
         name_munic,
         estimated_population)

## 1.6. Estimated Population by Municipality and Age -----------------------

## Reference: http://tabnet.datasus.gov.br/cgi/deftohtm.exe?popsvs/cnv/popbr.def

## Download Brazil estimated population 
## by municipality and Age in 2021 (DATASUS/2021)

download.file(url = "http://tabnet.datasus.gov.br/csv/A143850189_28_143_208.csv",
              destfile = "data/input/estimated_population_by_age_2021_datasus.csv",
              mode = "wb")

## Load the file

estimatedpopage <- read_csv2("data/input/estimated_population_by_age_2021_datasus.csv",
                            skip =  3,
                            n_max = 5570,
                            locale = locale(encoding = "latin1"))

## Create a column for IBGE Municipality Code
## and remove unnecessary columns

estimatedpopage <- estimatedpopage %>% 
  separate(Município, into = c('city_ibge_code6', 
                               'name_munic'), sep = 7) %>% 
  mutate(city_ibge_code6 = trimws(city_ibge_code6),
         name_munic = trimws(name_munic),
         id = as.factor(1:5570)) %>% 
  rename("0-4 years" = "De 0 a 4 anos",
         "5-9 years" = "De 5 a 9 anos",
         "10-14 years" = "De 10 a 14 anos",
         "15-19 years" = "De 15 a 19 anos",
         "20-24 years" = "De 20 a 24 anos",
         "25-29 years" = "De 25 a 29 anos",
         "30-34 years" = "De 30 a 34 anos",
         "35-39 years" = "De 35 a 39 anos",
         "40-44 years" = "De 40 a 44 anos",
         "45-49 years" = "De 45 a 49 anos",
         "50-54 years" = "De 50 a 54 anos",
         "55-59 years" = "De 55 a 59 anos",
         "60-64 years" = "De 60 a 64 anos",
         "65-69 years" = "De 65 a 69 anos",
         "70-74 years" = "De 70 a 74 anos",
         "75-79 years" = "De 75 a 79 anos",
         ">= 80 years" = "De 80 anos ou mais") %>% 
  select(-Total)

## Reshape the data

estimatedpopage <- estimatedpopage %>% 
  gather(age, 
         estimated_population, 
         names(estimatedpopage)[3:19], 
         factor_key = TRUE) %>% 
  select(-id)

## Create a new variable 

estimatedpopage <- estimatedpopage %>% 
  mutate("age_recoded" = ifelse(age == "0-4 years" |
                                age == "5-9 years" |
                                age == "10-14 years" |
                                age == "15-19 years",
                                "0-19 years",
                                ifelse(age == "20-24 years" |
                                       age == "25-29 years" |
                                       age == "30-34 years" |
                                       age == "35-39 years" |
                                       age == "40-44 years" |
                                       age == "45-49 years" |
                                       age == "50-54 years" |
                                       age == "55-59 years",
                                       "20-59 years",
                                       ifelse(age == "60-64 years" |
                                              age == "65-69 years" |
                                              age == "70-74 years" |
                                              age == "75-79 years" |
                                              age == ">= 80 years",
                                              ">= 60 years",
                                              NA)))) %>% 
  select(city_ibge_code6,
         age_recoded,
         estimated_population) %>% 
  unique()

## Aggregate the results by age and municipality

estimatedpopage <- estimatedpopage %>% 
  group_by(city_ibge_code6, 
           age_recoded) %>% 
  summarise(estimated_population_by_age = sum(estimated_population)) %>% 
  mutate(estimated_population_by_age = gsub(r"{\s*\([^\)]+\)}",
                                            "",
                                            estimated_population_by_age))

## 1.7. IBGE Data ----------------------------------------------------------
## 1.7.1 states and country regions divisions
## Download IBGE data

infostate <- geobr::read_state(year = 2018,
                               simplified = TRUE)

## Remove unnecessary columns

infostate <- infostate %>% 
  select(abbrev_state,
         name_region) %>% 
  st_drop_geometry() %>% 
  rename("state" = "abbrev_state",
         "region" = "name_region")

## 1.7.2 Municipality Areas
GET("https://geoftp.ibge.gov.br/organizacao_do_territorio/estrutura_territorial/areas_territoriais/2020/AR_BR_RG_UF_RGINT_RGIM_MES_MIC_MUN_2020.xls", write_disk(temp_file <- tempfile(fileext = ".xls")))

area <- readxl::read_xls(temp_file)
## 1.8. TSE Code -----------------------------------------------------------

## Reference: CEPESP DATA (https://cepespdata.io/documentacao#Tabelas_Auxiliares_-_Downloads)

## Download equivalency between IBGE and TSE Code of Municipalities

download.file(url = "http://cepespdata.io/static/docs/cod_municipios.csv",
              destfile = "data/input/cod_munic.csv",
              mode = "wb")

## Load the file

cod_munic <- read_delim("data/input/cod_munic.csv", 
                        delim = ";", 
                        escape_double = FALSE, 
                        col_types = cols(...1 = col_skip(), 
                                         ANO_ELEICAO = col_skip()), 
                        trim_ws = TRUE)

## Rename the columns 

cod_munic <- cod_munic %>% 
  select(-NOME_MUNICIPIO) %>% 
  rename("CD_MUNICIPIO" = "COD_MUN_TSE",
         "city_ibge_code" = "COD_MUN_IBGE",
         "SG_UF" = "UF") %>% 
  mutate(city_ibge_code6 = substr(city_ibge_code,
                                  1, 
                                  nchar(city_ibge_code)-1)) %>% 
  unique()  

# 2. Join -----------------------------------------------------------------

## 2.1 Joinning the summary of election with the data
## about the candidates

elections18 <- left_join(elsummary,
                         candidates)

## Joinning data about the candidates and their respectives votes

elections18 <- left_join(elections18, 
                         votes)

## Joinning the data with IBGE Code

elections18 <- left_join(elections18,
                         cod_munic)

## Calculate the proportion of votes for
## each candidate and remove unnecessary
## columns

elections18 <- elections18 %>% 
  mutate("PROPORÇÃO DE VOTOS RECEBIDOS" = round(QT_VOTOS/`VOTOS VÁLIDOS`, 4),
         city_ibge_code = as.character(city_ibge_code),
         NM_MUNICIPIO = str_to_upper(NM_MUNICIPIO)) %>% 
  select(ANO_ELEICAO,
         NR_TURNO,
         SG_UF,
         city_ibge_code,
         city_ibge_code6,
         CD_MUNICIPIO,
         NM_MUNICIPIO,
         DS_CARGO,
         NM_CANDIDATO,
         NM_URNA_CANDIDATO,
         NR_CANDIDATO,
         SG_PARTIDO,
         DS_COMPOSICAO_COLIGACAO,
         `VOTOS VÁLIDOS`,
         QT_VOTOS,
         `PROPORÇÃO DE VOTOS RECEBIDOS`,
         DS_SIT_TOT_TURNO) %>% 
  arrange(SG_UF,
          NM_MUNICIPIO,
          NR_TURNO)

## Joinning with the population data

elections18 <- left_join(elections18,
                         estimatedpop)

## Joinning with the data of population by age 

elections18 <- left_join(elections18,
                         estimatedpopage)

## Joinning with the IBGE data

elections18 <- left_join(elections18,
                         infostate)

## Joinning with the Covid data

elections18 <- left_join(elections18,
                         covid)

## Create a data frame for bolsonaro

bolsonaro <- elections18 %>% 
  filter(NM_CANDIDATO == "JAIR MESSIAS BOLSONARO")

## Calculates the ratio of an age group to the total population

bolsonaro <- bolsonaro %>% 
  mutate(estimated_population = as.numeric(estimated_population),
         estimated_population_by_age = as.numeric(estimated_population_by_age),
         "PREDOMINÂNCIA DA FAIXA ETÁRIA" = round(estimated_population_by_age/estimated_population, 4),
         "GRAU DE PREDOMINÂNCIA" = ifelse(`PREDOMINÂNCIA DA FAIXA ETÁRIA` <= 0.20,
                                          "0-20%",
                                          ifelse(`PREDOMINÂNCIA DA FAIXA ETÁRIA` > 0.20 &
                                                   `PREDOMINÂNCIA DA FAIXA ETÁRIA` <= 0.40,
                                                 "20-40%",
                                                 ifelse(`PREDOMINÂNCIA DA FAIXA ETÁRIA` > 0.40 &
                                                          `PREDOMINÂNCIA DA FAIXA ETÁRIA` <= 0.60,
                                                        "40-60%",
                                                        ifelse(`PREDOMINÂNCIA DA FAIXA ETÁRIA` > 0.60 &
                                                                 `PREDOMINÂNCIA DA FAIXA ETÁRIA` <= 0.80,
                                                               "60-80%",
                                                               ifelse(`PREDOMINÂNCIA DA FAIXA ETÁRIA` >= 0.80,
                                                                      "80-100%", 
                                                                      NA)))))) %>% 
  mutate(`GRAU DE PREDOMINÂNCIA` = factor(`GRAU DE PREDOMINÂNCIA`,
                                          levels = c("0-20%",
                                                     "20-40%",
                                                     "40-60%",
                                                     "60-80%",
                                                     "80-100%")))

## Rename and order the columns

bolsonaro <- bolsonaro %>% 
  mutate(region = str_to_upper(region)) %>% 
  select(ANO_ELEICAO,
         NR_TURNO,
         region,
         SG_UF,
         CD_MUNICIPIO,
         city_ibge_code,
         city_ibge_code6,
         NM_MUNICIPIO,
         DS_CARGO,
         NM_CANDIDATO,
         SG_PARTIDO,
         DS_COMPOSICAO_COLIGACAO,
         `VOTOS VÁLIDOS`,
         QT_VOTOS,
         `PROPORÇÃO DE VOTOS RECEBIDOS`,
         DS_SIT_TOT_TURNO,
         estimated_population,
         age_recoded,
         estimated_population_by_age,
         date,
         epidemiological_week,
         last_available_confirmed_per_100k_inhabitants,
         last_available_deaths_per_100k_inhabitans,
         new_confirmed_per_100k,
         new_deaths_per_100k,
         `PREDOMINÂNCIA DA FAIXA ETÁRIA`,
         `GRAU DE PREDOMINÂNCIA`) %>% 
  rename("year" = "ANO_ELEICAO",
         "round" = "NR_TURNO",
         "state" = "SG_UF",
         "city_tse_code" = "CD_MUNICIPIO",
         "city" = "NM_MUNICIPIO",
         "position" = "DS_CARGO",
         "candidate" = "NM_CANDIDATO",
         "party" = "SG_PARTIDO",
         "coalition" = "DS_COMPOSICAO_COLIGACAO",
         "valid_votes" = "VOTOS VÁLIDOS",
         "total_votes" = "QT_VOTOS",
         "prop_votes" = "PROPORÇÃO DE VOTOS RECEBIDOS",
         "outcome" = "DS_SIT_TOT_TURNO",
         "prop_age_group" = "PREDOMINÂNCIA DA FAIXA ETÁRIA",
         "prop_age_group_recoded" = "GRAU DE PREDOMINÂNCIA")

## Save the data

saveRDS(bolsonaro,
        "data/output/votes_for_bolsonaro_2018.rds")

## 2.2 Joining Covid and area datasets
## joining with population data and generating density values
area_pop <-
  covid %>% left_join(., area, by = c("city_ibge_code" = "CD_GCMUN"))
area_pop$density <-
  area_pop$estimated_population / area_pop$AR_MUN_2020

## creating quartile variable
area_pop$quartile_dens <-
  cut(
    area_pop$density,
    breaks = quantile(area_pop$density, probs = seq(0, 1, 0.25)),
    include.lowest = TRUE,
    labels = c("0.04 to 11.6","11.7 to 25.1","25.2 to 56.5","56.6 to 14,403")
  )

## selecting desired variables
area_pop <-  area_pop %>% select(city,city_ibge_code,date,estimated_population,
                                 last_available_confirmed_per_100k_inhabitants,last_available_deaths_per_100k_inhabitans,
                                 category_pop,n_category_pop,AR_MUN_2020,density,quartile_dens)
# Save the data
saveRDS(area_pop,
        "data/output/area_pop_covid_mun_20220327.rds")


# 3. Figures --------------------------------------------------------------
## Figures 1 thru 6 uses the votes_for_bolsonaro_2018.rds data frame
## Figures 7 and 8 uses the area_pop_covid_mun_20220327.rds data frame



## 3.1. Figure 1 -----------------------------------------------------------

## Data frame with only the first round results

# bolsonaro <- readRDS("data/output/votes_for_bolsonaro_2018.rds")

firstround <- bolsonaro %>%
  ungroup() %>% 
  filter(round == 1) %>% 
  rename("first round" = "prop_votes") %>% 
  select(-round) 

## Plot the graph

figure1 <- firstround %>% 
  filter(state != "ZZ") %>% 
  select(`first round`,
         region,
         state,
         city) %>% 
  unique() %>%
  ggplot(aes(x = `first round` * 100))+
  geom_histogram(
    aes(y = (..count..)/sum(..count..) * 100),
    bins = 30,
    fill = "#3d405b",
    colour = "black") +
  scale_x_continuous(breaks = seq(0,100,20),
                     limits = c(0,101),
                     expand = c(0, 0))+
  scale_y_continuous(
    breaks = seq(0,8,2),
    limits = c(0,8.2),
    expand = c(0, 0)) +
  labs(x = "% of votes for Bolsonaro (1st round, 2018)",
       y = "Percent") +
  theme_minimal()+
  theme(legend.position = "bottom",
        plot.margin = unit(c(1,1,1,1.2),"cm"),
        plot.title = element_text(hjust = 0.5, 
                                  family = "Cambria",
                                  size = 11),
        plot.caption = element_text(family = "Cambria",
                                    size = 11),
        panel.grid.major = element_line(colour = "grey80"), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "grey80"),
        axis.ticks = element_line(colour = "grey80",),
        axis.title = element_text(family = "Cambria",
                                  size = 11,
                                  margin = unit(c(10, 0, 0, 0), "mm")),
        axis.text = element_text(family = "Cambria",
                                 size = 11,
                                 margin = unit(c(0, 0, 3, 0), "mm")),
        panel.border = element_blank(),
        legend.title = element_text(family = "Cambria",
                                    size = 11),
        legend.text = element_text(family = "Cambria",
                                   size = 11)) + 
  annotate("segment",
           x = c(0.3901 * 100, 
                 46.03),
           xend = c(0.3901 * 100, 
                    46.03),
           y = 0,
           yend = 8,
           colour = c("black", "red")) 

figure1

## Save the graph

ggsave(filename = "data/output/figure 1_percentage_votes_for_bolsonaro_1st_means.png",
       bg = "white",
       plot = figure1,
       width = 12,
       height = 7,
       dpi = 700)

## 3.2. Figure 2 -----------------------------------------------------------

## Data frame with only the second round results

secondround <- bolsonaro %>%
  ungroup() %>% 
  filter(round == 2) %>% 
  rename("second round" = "prop_votes") %>% 
  select(-round) 

## Plot the graph

figure2 <- secondround %>% 
  filter(state != "ZZ") %>% 
  select(`second round`,
         region,
         state,
         city) %>% 
  unique() %>%
  ggplot(aes(x = `second round` * 100))+
  geom_histogram(
    aes(y = (..count..)/sum(..count..) * 100),
    bins = 30,
    fill = "#3d405b",
    colour = "black")+
  scale_x_continuous(breaks = seq(0,100,20),
                     limits = c(0,101),
                     expand = c(0, 0)) +
  scale_y_continuous(breaks = seq(0,8,2),
                     limits = c(0,8.2),
                     expand = c(0, 0)) +
  labs(x = "% of votes for Bolsonaro (2nd round, 2018)",
       y = "Percent") +
  theme_minimal()+
  theme(legend.position = "bottom",
        plot.margin = unit(c(1,1,1,1.2),"cm"),
        plot.title = element_text(hjust = 0.5, 
                                  family = "Cambria",
                                  size = 11),
        plot.caption = element_text(family = "Cambria",
                                    size = 11),
        panel.grid.major = element_line(colour = "grey80"), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "grey80"),
        axis.ticks = element_line(colour = "grey80"),
        axis.title = element_text(family = "Cambria",
                                  size = 11,
                                  margin = unit(c(3, 0, 0, 0), "mm")),
        axis.text = element_text(family = "Cambria",
                                 size = 11,
                                 margin = unit(c(0, 0, 3, 0), "mm")),
        panel.border = element_blank(),
        legend.title = element_text(family = "Cambria",
                                    size = 11),
        legend.text = element_text(family = "Cambria",
                                   size = 11)) +
  guides(fill = guide_legend(title = "Region",
                             nrow = 1))+
  annotate("segment",
           x = c(0.4691 * 100,
                 55.13),
           xend = c(0.4691 * 100,
                    55.13),
           y = 0,
           yend = 8,
           colour = c("black","red"))

figure2

## Save the graph

ggsave(filename = "data/output/figure 2_percentage_votes_for_bolsonaro_2nd_means.png",
       bg = "white",
       plot = figure2,
       width = 12,
       height = 7,
       dpi = 700)

## 3.3. Figure 3 -----------------------------------------------------------

## Set the outlier values in the votes received by 
## Bolsonaro in the 1st and 2nd rounds

is_outlier <- function(x) {
  return(
    x < 0.05 |
      x > 0.90
  )
}

is_outlier2 <- function(x) {
  return(
    x < 0.05 |
      x > 0.83
  )
}

## Create a color palette

palette <- RColorBrewer::brewer.pal(9, 
                                   name = "Greens")[c(5,9)]

## Data frame with only the first round results

firstround <- bolsonaro %>%
  ungroup() %>% 
  filter(round == 1) %>% 
  rename("first round" = "prop_votes") %>% 
  select(-round) 

## Data frame with only the second round results

secondround <- bolsonaro %>%
  ungroup() %>% 
  filter(round == 2) %>% 
  rename("second round" = "prop_votes") %>% 
  select(city_ibge_code,
         `second round`) %>% 
  filter(!is.na(city_ibge_code)) %>% 
  unique() 

## Joinning the data

firstround <- left_join(firstround,
                        secondround)

## Plot the graph

figure3 <- firstround %>%
  filter(age_recoded == ">= 60 years") %>% 
  select(-age_recoded,
         -estimated_population_by_age) %>%
  unique() %>%
  mutate(outlier = ifelse(is_outlier(`second round`),
                          paste0(str_to_title(city), " (", state, ")"),
                          NA_character_)) %>%
  mutate(outlier2 = ifelse(is_outlier2(`first round`),
                           paste0(str_to_title(city), " (", state, ")"),
                           NA_character_)) %>%
  mutate(outlier = ifelse(outlier == "Carnaubeira Da Penha (PE)" |
                          outlier == "Capitão Gervásio Oliveira (PI)",
                          NA_character_,
                          ifelse(outlier == "Saltos Do Guaira (ZZ)",
                                 "Saltos do Guaira (ZZ)",
                                 outlier))) %>%
  mutate(outlier2 = ifelse(outlier2 == "Treze De Maio (SC)",
                           "Treze de Maio (SC)",
                           NA_character_)) %>%
  ggplot(aes(x = `first round` * 100,
             y = `second round` * 100,
             color = prop_age_group_recoded))+
  geom_point(size = 1.5) +
  ggrepel::geom_label_repel(aes(label = outlier),
                            size = 3,
                            family = "Cambria",
                            color = "black",
                            hjust = 0.5,
                            vjust = 0.5,
                            nudge_y = 1,
                            segment.alpha = 1,
                            segment.size = unit(0.2, "mm")) +
  ggrepel::geom_label_repel(
    aes(label = outlier2),
    size = 3,
    family = "Cambria",
    color = "black",
    hjust = 0.5,
    vjust = 0.5,
    nudge_y = -1,
    nudge_x = 1,
    segment.alpha = 1,
    segment.size = unit(0.2, "mm")) +
  scale_x_continuous(breaks = seq(0,100,20),
                     limits = c(0,100),
                     expand = c(0, 0)) +
  scale_y_continuous(breaks = seq(0,100,20),
                     limits = c(0,100),
                     expand = c(0, 0)) +
  labs(subtitle = "Population aged 60 years or over",
       x = "% of votes for Bolsonaro (1st round, 2018)",
       y = "% of votes for Bolsonaro (2nd round, 2018)") +
  theme_bw() +
  scale_color_manual(values = palette) +
  theme(legend.position = "bottom",
        plot.margin = unit(c(1,1,1,1.2),"cm"),
        plot.title = element_text(hjust = 0.5, 
                                  family = "Cambria",
                                  size = 11,
                                  margin = unit(c(0, 0, 5, 0), "mm")),
        plot.caption = element_text(family = "Cambria",
                                    size = 11),
        plot.subtitle = element_text(hjust = 0.75, 
                                     color = "grey40",
                                     family = "Cambria",
                                     size = 11,
                                     face = "bold"),
        panel.grid.major = element_line(colour = "grey80"),
        panel.grid.minor = element_line(colour = "grey80"),
        axis.line = element_line(colour = "grey80"),
        axis.ticks = element_line(colour = "grey80"),
        axis.title = element_text(family = "Cambria",
                                  size = 11,
                                  margin = unit(c(15, 0, 0, 0), "mm")),
        axis.text = element_text(family = "Cambria",
                                 size = 11,
                                 margin = unit(c(0, 0, 25, 0), "mm")),
        panel.border = element_rect(colour = "grey80"),
        legend.title = element_text(family = "Cambria",
                                    size = 11),
        legend.text = element_text(family = "Cambria",
                                   size = 11))+
  guides(color = guide_legend(title = "Predominance of the age group",
                              nrow = 1,
                              override.aes = list(
                                size = c(rep(3,2)))))

figure3

## Save the graph

ggsave(filename = "data/output/figure 3_percentage_votes_for_bolsonaro_by_age_60.png",
       plot = figure3,
       width = 12,
       height = 7,
       dpi = 700)

## 3.4. Figure 4 -----------------------------------------------------------

## Create a color palette

palette <- c("#91cf60",
            "#3d405b",
            "#81b29a",
            "#540b0e",
            "#ee964b")

## Defines outliers for Bolsonaro votes, cases 
## and deaths by Covid-19

is_outlier <- function(x) {
  
  return(x < 0.05 |
           x > 0.90)
  
}

is_outlier_cases <- function(x) {
  
  return(x > 50000)
  
}

is_outlier_deaths <- function(x) {
  
  return(x > 800)
  
}

## Data frame with only the first round results

firstround <- bolsonaro %>%
  ungroup() %>% 
  filter(round == 1) %>% 
  rename("first round" = "prop_votes") %>% 
  select(-round) 

## Data frame with only the second round results

secondround <- bolsonaro %>%
  ungroup() %>% 
  filter(round == 2) %>% 
  rename("second round" = "prop_votes") %>% 
  select(city_ibge_code,
         `second round`) %>% 
  filter(!is.na(city_ibge_code)) %>% 
  unique() 

## Joinning the data

firstround <- left_join(firstround,
                        secondround) %>%
  mutate(region = ifelse(region == "NORTE",
                         "NORTH",
                         ifelse(region == "SUL",
                                "SOUTH",
                                ifelse(region == "SUDESTE",
                                       "SOUTHEAST",
                                       ifelse(region == "NORDESTE",
                                              "NORTHEAST",
                                              ifelse(region == "CENTRO OESTE",
                                                     "CENTRAL WEST",
                                                     NA)))))) %>% 
  mutate(region = str_to_title(region),
         region = factor(region,
                         levels = c("Central West",
                                    "North",
                                    "Northeast",
                                    "South",
                                    "Southeast"))) 

## Plot the graph

figure4 <- firstround %>% 
  filter(region != "Exterior") %>% 
  select(`first round`, 
         `second round`, 
         region,
         state,
         city) %>% 
  unique() %>% 
  mutate(outlier = ifelse(is_outlier(`second round`),
                          paste0(str_to_title(city), " (", state, ")"),
                          NA_character_)) %>% 
  mutate(outlier = ifelse(outlier == "Carnaubeira Da Penha (PE)" |
                            outlier == "Capitão Gervásio Oliveira (PI)",
                          NA_character_,
                          ifelse(outlier == "Saltos Do Guaira (ZZ)",
                                 "Saltos do Guaira (ZZ)",
                                 outlier))) %>% 
  ggplot(aes(x = `first round` * 100,
             y = `second round` * 100,
             color = region))+
  geom_point(size = 1.5) +
  ggrepel::geom_label_repel(aes(label = outlier),
                            size = 3,
                            family = "Cambria",
                            color = "black",
                            hjust = 0.5,
                            vjust = 0.5,
                            nudge_y = 0,
                            segment.alpha = 1, 
                            segment.size = unit(0.2, "mm")) +
  scale_x_continuous(breaks = seq(0,100,20),
                     limits = c(0,100),
                     expand = c(0, 0)) +
  scale_y_continuous(breaks=seq(0,100,20),
                     limits = c(0,100),
                     expand = c(0, 0)) +
  labs(
    x = "% of votes for Bolsonaro (1st round, 2018)",
    y = "% of votes for Bolsonaro (2nd round, 2018)") +
  theme_bw() +
  scale_color_manual(values = palette)+
  theme(legend.position = "bottom",
        plot.margin = unit(c(1,1,1,1.2),"cm"),
        plot.title = element_text(hjust = 0.5,
                                  size = 11,
                                  family = "Cambria"),
        plot.caption = element_text(size = 11,
                                    family = "Cambria"),
        panel.grid.major = element_line(colour = "grey80"), 
        panel.grid.minor = element_line(colour = "grey80"),
        axis.line = element_line(colour = "grey80"),
        axis.ticks = element_line(colour = "grey80"),
        axis.title = element_text(size = 11,
                                  family = "Cambria"),
        axis.text = element_text(size = 11,
                                 family = "Cambria"),
        panel.border = element_rect(colour = "grey80"),
        legend.title = element_text(family = "Cambria",
                                    size = 11),
        legend.text = element_text(size = 11,
                                   family = "Cambria")) +
  guides(color = guide_legend(title = "Region",
                              nrow = 1,
                              override.aes = list(size = c(rep(3,5)))))

figure4

## Save the graph

ggsave(filename = "data/output/figure 4_percentage_votes_for_bolsonaro.png",
       plot = figure4,
       width = 12,
       height = 7,
       dpi = 700)

## 3.5. Figure 5 -----------------------------------------------------------

## Plot the graph

figure5 <- bolsonaro %>%
  ungroup() %>% 
  mutate(region = ifelse(region == "NORTE",
                         "NORTH",
                         ifelse(region == "SUL",
                                "SOUTH",
                                ifelse(region == "SUDESTE",
                                       "SOUTHEAST",
                                       ifelse(region == "NORDESTE",
                                              "NORTHEAST",
                                              ifelse(region == "CENTRO OESTE",
                                                     "CENTRAL WEST",
                                                     NA)))))) %>% 
  mutate(region = str_to_title(region),
         region = factor(region,
                         levels = c("Central West",
                                    "North",
                                    "Northeast",
                                    "South",
                                    "Southeast"))) %>%  
  filter(state != "ZZ") %>% 
  filter(round == 1) %>% 
  select(region,
         state,
         city,
         last_available_confirmed_per_100k_inhabitants,
         prop_votes) %>% 
  unique() %>% 
  mutate(outlier = ifelse(is_outlier_cases(last_available_confirmed_per_100k_inhabitants),
                          paste0(str_to_title(city), " (", state, ")"),
                          NA_character_)) %>% 
  mutate(outlier = ifelse(outlier == "Santa Cecília Do Sul (RS)",
                          "Santa Cecília do Sul (RS)",
                          outlier)) %>% 
  ggplot(aes(x = prop_votes * 100,
             y = last_available_confirmed_per_100k_inhabitants,
             color = region))+
  geom_point(size = 1.5, alpha = 0.8) +
  ggrepel::geom_label_repel(aes(label = outlier),
                            size = 3,
                            family = "Cambria",
                            color = "black",
                            hjust = 0.5,
                            vjust = 0.5,
                            nudge_y = 0,
                            segment.alpha = 1,
                            segment.size = unit(0.2, "mm")) +
  scale_x_continuous(breaks = seq(0,100,20),
                     limits = c(0,100),
                     expand = c(0, 0)) +
  scale_y_continuous(
    breaks = seq(0,60000,10000),
    limits = c(0,60000),
    labels = comma_format(big.mark = ",",
                          decimal.mark = "."),
    expand = c(0, 0)) +
  labs(x = "% of votes for Bolsonaro (1st round, 2018)",
       y = "Cases/100,000 inhabitants") +
  theme_bw() +
  scale_color_manual(values = palette) +
  theme(legend.position = "bottom",
        plot.margin = unit(c(1,1,1,1.2),"cm"),
        plot.title = element_text(hjust = 0.5, 
                                  family = "Cambria",
                                  size = 11),
        plot.caption = element_text(family = "Cambria",
                                    size = 11),
        panel.grid.major = element_line(colour = "grey80"), 
        panel.grid.minor = element_line(colour = "grey80"),
        axis.line = element_line(colour = "grey80"),
        axis.ticks = element_line(colour = "grey80"),
        panel.border = element_rect(colour = "grey80"),
        axis.title = element_text(family = "Cambria",
                                  size = 11),
        axis.text = element_text(family = "Cambria",
                                 size = 11),
        legend.title = element_text(family = "Cambria",
                                    size = 11),
        legend.text = element_text(family = "Cambria",
                                   size = 11)) +
  guides(color = guide_legend(title = "Region",
                              nrow = 1,
                              override.aes = list(size = c(rep(3,5)))))

figure5

## Save the graph

ggsave(filename = "data/output/figure 5_percentage_votes_for_bolsonaro 1st_and_cases.png",
       plot = figure5,
       width = 12,
       height = 7,
       dpi = 700)

## 3.6. Figure 6 -----------------------------------------------------------

## Plot the graph

figure6 <- bolsonaro %>%
  ungroup() %>% 
  mutate(region = ifelse(region == "NORTE",
                         "NORTH",
                         ifelse(region == "SUL",
                                "SOUTH",
                                ifelse(region == "SUDESTE",
                                       "SOUTHEAST",
                                       ifelse(region == "NORDESTE",
                                              "NORTHEAST",
                                              ifelse(region == "CENTRO OESTE",
                                                     "CENTRAL WEST",
                                                     NA)))))) %>% 
  mutate(region = str_to_title(region),
         region = factor(region,
                         levels = c("Central West",
                                    "North",
                                    "Northeast",
                                    "South",
                                    "Southeast"))) %>% 
  filter(state != "ZZ") %>% 
  filter(round == 1) %>% 
  select(region,
         state,
         city,
         last_available_deaths_per_100k_inhabitans,
         prop_votes) %>% 
  unique() %>% 
  mutate(outlier = ifelse(is_outlier_deaths(last_available_deaths_per_100k_inhabitans),
                          paste0(str_to_title(city), " (", state, ")"),
                          NA_character_)) %>% 
  mutate(outlier = ifelse(outlier == "Santa Clara D Oeste (SP)",
                          "Santa Clara D'Oeste (SP)",
                          ifelse(outlier == "Lago Dos Rodrigues (MA)",
                                 "Lago dos Rodrigues (MA)",
                                 outlier))) %>% 
  ggplot(aes(x = prop_votes * 100,
             y = last_available_deaths_per_100k_inhabitans,
             color = region))+
  geom_point(size = 1.5, alpha = 0.8) +
  ggrepel::geom_label_repel(aes(label = outlier),
                            size = 3,
                            family = "Cambria",
                            color = "black",
                            hjust = 0.5,
                            vjust = 0.5,
                            nudge_y = 0,
                            segment.alpha = 1,
                            segment.size = unit(0.2, "mm")) +
  scale_x_continuous(breaks = seq(0,100,20),
                     limits = c(0,100),
                     expand = c(0, 0)) +
  scale_y_continuous(
    breaks = seq(0,1000,200),
    limits = c(0,1000),
    expand = c(0, 0)) +
  labs(x = "% of votes for Bolsonaro (1st round, 2018)",
       y = "Deaths/100,000 inhabitants") +
  theme_bw() +
  scale_color_manual(values = palette) +
  theme(legend.position = "bottom",
        plot.margin = unit(c(1,1,1,1.2),"cm"),
        plot.title = element_text(hjust = 0.5, 
                                  family = "Cambria",
                                  size = 11),
        plot.caption = element_text(family = "Cambria",
                                    size = 11),
        panel.grid.major = element_line(colour = "grey80"), 
        panel.grid.minor = element_line(colour = "grey80"),
        axis.line = element_line(colour = "grey80"),
        axis.ticks = element_line(colour = "grey80"),
        axis.title = element_text(family = "Cambria",
                                  size = 11),
        axis.text = element_text(family = "Cambria",
                                 size = 11),
        panel.border = element_rect(colour = "grey80"),
        legend.title = element_text(family = "Cambria",
                                    size = 11),
        legend.text = element_text(family = "Cambria",
                                   size = 11)) +
  guides(color = guide_legend(title = "Region",
                              nrow = 1,
                              override.aes = list(size = c(rep(3,5)))))

figure6

## Save the graph

ggsave(filename = "data/output/figure 6_percentage_votes_for_bolsonaro 1st_and_deaths.png",
       plot = figure6,
       width = 12,
       height = 7,
       dpi = 700)

## 3.7. Figure 7 -----------------------------------------------------------

#area_pop <- ReadRDS("data/output/area_pop_covid_mun_20220327.rds")

## Creating plot 
pop_g <-
  area_pop %>% select(
    category_pop,
    n_category_pop,
    last_available_confirmed_per_100k_inhabitants,
    last_available_deaths_per_100k_inhabitans
  ) %>%
  rename("Cases per 100,000 people" = "last_available_confirmed_per_100k_inhabitants",
         "Deaths per 100,000 people" = "last_available_deaths_per_100k_inhabitans") %>%
  gather("type",
         "value",
         "Cases per 100,000 people":"Deaths per 100,000 people") 

pop_plot <- ggplot(pop_g, aes(x = category_pop, y = value)) +
  geom_violin(width = .6,
              alpha = .7,
              show.legend = FALSE) +
  geom_boxplot(
    width = .3,
    cex = .5,
    position = position_dodge(.4),
    show.legend = FALSE,
    notchwidth = 0.3
  ) +
  geom_text(data = subset( pop_g,type == "Cases per 100,000 people"),
            aes(y = 43000, label = n_category_pop),family="Cambria") +
  facet_wrap(. ~ type, nrow = 2, scales = "free_y") +
  theme_bw() +
  labs(x = "Number of inhabitants of the municipality", y = "")+
  theme(text = element_text(family = "Cambria",size=11))

## Saving Figure 7
ggsave(pop_plot,
  filename = "data/output/figure7_city_size_cases_deaths.png",
  width = 24,
  height = 24,
  units = "cm",
  path = "Figures",
  dpi = 400
)


## 3.8. Figure 8 -----------------------------------------------------------


## Generating plot
area_pop_g <-
  area_pop %>% select(
    quartile_dens,
    last_available_confirmed_per_100k_inhabitants,
    last_available_deaths_per_100k_inhabitans
  ) %>%
  rename("Cases per 100,000 people" = "last_available_confirmed_per_100k_inhabitants",
         "Deaths per 100,000 people" = "last_available_deaths_per_100k_inhabitans") %>%
  gather("type",
         "value",
         "Cases per 100,000 people":"Deaths per 100,000 people") %>%

ggplot(., aes(x = quartile_dens, y = value)) +
  geom_violin(width = .6,
              alpha = .7,
              show.legend = FALSE) +
  geom_boxplot(
    width = .3,
    cex = .5,
    position = position_dodge(.4),
    show.legend = FALSE,
    notchwidth = 0.3
  ) +
  facet_wrap(. ~ type, nrow = 2, scales = "free_y") +
  theme_bw() +
  labs(x = "Population Density (km²) by quartile", y = "")+
  theme(text = element_text(family = "Cambria",size=11))

## Saving the plot
ggsave(area_pop_g,
  filename = "data/output/figure8_city_density_cases_deaths.png",
  width = 24,
  height = 24,
  units = "cm",
  path = "Figures",
  dpi = 400
)

